# -*- coding: utf-8 -*-
"""
Created on Mon may 29 08:45:33 2018

@author: 向姚冰
"""
import os 
import random
from PIL import Image
from keras.models import Sequential,load_model
from keras.layers import LSTM,Dense,Activation,SimpleRNN,Conv2D,MaxPool2D,Flatten,Reshape,Dropout
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.metrics import categorical_accuracy
from keras.optimizers import RMSprop
import numpy as np
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


def load_data(path1, path2):
#读入文件列表，path代表2个类别的文件路径
    filelist1 = [os.path.join(path1, f) for f in os.listdir(path1)]
    filelist2 = [os.path.join(path2, f) for f in os.listdir(path2)]
    x_test = []
    y_test = []
    n1 = len(filelist1)
    n2 = len(filelist2)
    for img in filelist1:
        im = np.array(Image.open(img))#读图像
        #im = np.array(np.loadtxt(img))
        #im = im.flatten()
        x_test.append(im)
    for img in filelist2:
        im =np.array(Image.open(img))
        #print(img)
        #im = np.array(np.loadtxt(img))
        #im = im.flatten()
        x_test.append(im)
    x_test = np.array(x_test)
    #自己造标签 总共2类，所以标签是01
    y_test = np.zeros((n1 + n2), dtype=int)
    for i in range(n1): 
        y_test[i] = 0
    for i in range(n2):
        y_test[n1 + i] = 1
    return x_test, y_test

nb_classes = 2
path1 = './0'
path2 = './1'
data, labels = load_data(path1, path2)
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.1)
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

def build_model():
  
  model = Sequential()
  # 第一个卷积层，16个卷积核，大小3x3，卷积模式SAME,激活函数relu,输入张量的大小
  model.add(Conv2D(filters= 16, kernel_size=(3,3), padding='Same', activation='relu',input_shape=(168,168,3)))
  model.add(Conv2D(filters= 16, kernel_size=(3,3), padding='Same', activation='relu'))
  # 池化层,池化核大小２x2
  model.add(MaxPool2D(pool_size=(2,2)))
  # 随机丢弃四分之一的网络连接，防止过拟合  
  model.add(Conv2D(filters= 32, kernel_size=(3,3), padding='Same', activation='relu'))
  model.add(Conv2D(filters= 32, kernel_size=(3,3), padding='Same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2)))
  model.add(Conv2D(filters= 64, kernel_size=(3,3), padding='Same', activation='relu'))
  model.add(Conv2D(filters= 64, kernel_size=(3,3), padding='Same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2)))
  model.add(Conv2D(filters= 128, kernel_size=(3,3), padding='Same', activation='relu'))
  model.add(Conv2D(filters= 128, kernel_size=(3,3), padding='Same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
  model.add(Conv2D(filters= 128, kernel_size=(3,3), padding='Same', activation='relu'))
  model.add(Conv2D(filters= 128, kernel_size=(3,3), padding='Same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
  # 全连接层,展开操作，
  model.add(Flatten())
  # 添加隐藏层神经元的数量和激活函数
  model.add(Dense(128, activation='relu'))    
  model.add(Dropout(0.2))
  model.add(Dense(64, activation='relu'))    
  model.add(Dropout(0.2))
  model.add(Dense(64, activation='relu'))    
  model.add(Dropout(0.2))
  # 输出层
  model.add(Dense(2, activation='softmax'))
  return model

#*************************************************************
def rand_train(data,labels):#自助法采样
    len_train = len(labels)#获取样本1数
    train_data = [] ; train_label = []
    for i in range(len_train):#抽取样本数次样本
        index = random.randint(0,len_train-1)#随机生成样本索引
        train_data.append(data[index])#添加对应数据与标签
        train_label.append(labels[index])
        
    train_data = np.array(train_data)
    train_label = np.array(train_label)
    return train_data,train_label#返回训练集与训练集标签

def bagging_by_tree(x_train,y_train,x_test,y_test,t=10):#默认并行生成十个基学习器 
    predict_list = []
    for i in range(t):#并行生成T个
        train_data,train_label = rand_train(x_train,y_train)#自主采样1得到样本
        model_version = 'model_v'+str(i)
        model_version = build_model()
        print(model_version)
        best_weights_filepath = './best_weights_v'+str(i)+'.h5'
        earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
        saveBestModel = ModelCheckpoint(best_weights_filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
        model_version.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        model_version.fit(train_data,train_label, validation_split=0.1, verbose=1, batch_size=50, nb_epoch=30,callbacks=[earlyStopping, saveBestModel])
        model_version = load_model('./best_weights_v'+str(i)+'.h5')
        #model_version.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        #model_version.fit(train_data,train_label, validation_split=0.1, verbose=1, batch_size=100, epochs=20)
        #model_version.save('version'+str(i)+'cnn_model_vel_jiec.h5')
        total = []
        y_predicted = model_version.predict_classes(x_test)#预测数据
        total.append(y_predicted)
        predict_list.append(total)#结果添加到预测列表中
    return predict_list,y_test

def calc_error(predict_list,test_label):#计算错误率
    m,n,k = np.shape(predict_list)#提取预测集信息
    predict_label = np.sum(predict_list,axis = 0)
    predict_label = np.rint(predict_label/10)
    error_count = 0#初始化预测错误数
    for i in range(k):
        if predict_label[0][i] != test_label[i][1]:#判断预测精度
            error_count += 1
    error_rate = error_count/k
    return error_rate

if __name__ == "__main__":
    train_data,train_label = rand_train(x_train,y_train)
    predict_list , test_label = bagging_by_tree(train_data,train_label,x_test,y_test,t=10)
    print("Bagging错误率：",calc_error(predict_list,test_label))